import concurrent.futures
import os

def file_name(file_dir):
    filelist =[]
    for root, dirs, files in os.walk(file_dir):
        filelist.append(files)
    return files
def run(path,number,maker):
    filelist = file_name(path)
    textdict = {}
    for i in filelist:
        print(i)
        try:
            with open(path+"//"+i,"r",encoding="utf-8") as r:
                r = r.readlines()
                for j in range(len(r)):
                    #print(j / len(r) * 100, "%")
                    r[j] = r[j].split(maker)
                    if r[j][-1] in textdict:
                        if len(textdict[r[j][-1]]) == number:
                            continue

                        textdict[r[j][-1]].append(r[j][0])
                    else:
                        textdict[r[j][-1]] = [r[j][0]]
        except Exception:
            continue

    print("开始写入")
    for key,value in textdict.items():
        for v in value:
            with open(path+"//output1.txt","a",encoding="utf-8") as w:
                w.write(v+"@"+key)


run("C:\\Users\\qiyanxin\\Documents\\分享一亿两千万个邮箱地址库\\1亿2仟万邮址",10,"@")












'''
filename = "C:\\caoli\\pycharm\\project1\\huangkang\\url_email\\mail.txt"
textset = set()

with open(filename,"r",encoding="utf-8") as r:
    r = r.readlines()
    for i in range(len(r)):
        #r[i] = r[i].split("@")
        print(i/5000*100,"%")
        textset.add(r[i])


with open("mailquc.txt","w",encoding="utf-8") as w:
    w.writelines(textset)
'''